# DESCRIPTIVE STATISTICS, CODE

library(plyr)
library(ggplot2)
library(zoo)
library(scales)
library(stringr)
library(dplyr)
library(glue)
library(tidyverse)
library(text2vec)
library(lubridate)
library(data.table)
library(quanteda)
library(quanteda.textstats)
library(rddtools)
library(magrittr)
library(stargazer)
library(lmtest)
library(fixest)
library(modelsummary)
library(readxl)
library(cowplot)
library(texreg)
library(rdd)

# LOAD DATA
load("path to data_final.rda")

df_final <- subset(df_final, party != 1) # removing speaker
df_final <- subset(df_final, party != 0) # removing process writing
df_final$neo <- str_count(df_final$tokens_edit, "markedsøkonomi|frie marked|markedsreform") #Finder strings, der indeholder word
df_final$soc <- str_count(df_final$tokens_edit, "plantyranni|planøkono|kommandoøkono") #Finder strings, der indeholder word

# OVERALL TABLE
num_rows <- nrow(df_final) # overall total
table(df_final$year) # year total
year_counts <- table(df_final$year) # year total in table
mean_rows_per_year <- mean(year_counts) # mean
min_rows <- min(table(df_final$year)) # min
max_rows <- max(table(df_final$year)) # max
std_dev_rows_per_year <- sd(table(df_final$year)) # standard deviation
overall_summary <- data.frame(
  "Party or sub group" = "Overall",
  "Number of rows" = num_rows,
  "Mean rows per year" = mean_rows_per_year,
  "Minimum rows of any year" = min_rows,
  "Maximum rows of any year" = max_rows,
  "Standard deviation of rows per year" = std_dev_rows_per_year
)

# Making overall table on party level

party_groups <- unique(df_final$party)

# Initialize an empty data frame to store combined summary statistics
combined_summary <- data.frame()

# Loop through each party group
for (party_group in party_groups) {
  
  # Subset the data for the current party group
  subset_data <- df_final[df_final$party == party_group, ]
  
  # Calculate statistics
  num_rows <- nrow(subset_data) # Total number of rows for the party group
  year_counts <- table(subset_data$year) # Number of rows per year
  mean_rows_per_year <- mean(year_counts) # Mean rows per year
  min_rows <- min(year_counts) # Minimum rows for any year
  max_rows <- max(year_counts) # Maximum rows for any year
  std_dev_rows_per_year <- sd(year_counts) # Standard deviation of rows per year
  
  # Create a summary data frame for the current party group
  summary_df <- data.frame(
    "Party or sub group" = party_group,
    "Number of rows" = num_rows,
    "Mean rows per year" = mean_rows_per_year,
    "Minimum rows of any year" = min_rows,
    "Maximum rows of any year" = max_rows,
    "Standard deviation of rows per year" = std_dev_rows_per_year
  )
  
  # Append the summary to the combined table using bind_rows (from dplyr)
  combined_summary <- bind_rows(combined_summary, summary_df)
}

descr_overall <- rbind(overall_summary, combined_summary)

# CONTROLS AND IVS

summary_table_iv <- df_final %>%
  group_by(year) %>%
  summarize(
    neo_sum = sum(neo, na.rm = TRUE),
    soc_sum = sum(soc, na.rm = TRUE)
  )

df_long <- summary_table_iv %>%
  pivot_longer(cols = c(neo_sum, soc_sum), names_to = "variable")

summary_stats_table_iv <- df_long %>%
  group_by(variable) %>%
  summarize(
    Total = sum(value, na.rm = TRUE),
    Mean = mean(value, na.rm = TRUE),
    Minimum = min(value, na.rm = TRUE),
    Maximum = max(value, na.rm = TRUE),
    SD = sd(value, na.rm = TRUE)
  )


# Controls

df_long <- df_final %>%
  pivot_longer(cols = c(quarterly_growth, unemp_month), names_to = "variable")

# Calculate summary statistics
summary_stats_table_controls <- df_long %>%
  group_by(variable) %>%
  summarize(
    Total = sum(value, na.rm = TRUE),
    Mean = mean(value, na.rm = TRUE),
    Minimum = min(value, na.rm = TRUE),
    Maximum = max(value, na.rm = TRUE),
    SD = sd(value, na.rm = TRUE)
  )

summary_stats_all <- rbind(summary_stats_table_iv, summary_stats_table_controls)

# FIGURE OF PARTIES AND SPEECHES PER YEAR

#Speeches per year per party
library(dplyr)
party_year_counts <- df_final %>%
  group_by(party, year) %>%
  summarise(count = n())
party_year_counts_wide <- party_year_counts %>%
  pivot_wider(names_from = year, values_from = count, values_fill = 0)

grouped_party_df <- party_year_counts %>%
  mutate(
    grouped_party = case_when(
      party == 2 ~ "Social Democrats",
      party %in% c(3) ~ "Conservatives",
      party %in% c(4) ~ "Liberals",
      party == 5 ~ "Social Liberals",
      party == 6 ~ "Soc. People's Party",
      party == 7 ~ "Progress Party",
      party == 10 ~ "Centre Democrats",
      party == 12 ~ "Christian Democrats",
      TRUE ~ as.character(party) # Keep other parties as is
    )
  )
# Define the parties you want to plot based on your case_when statement
parties_to_plot <- c(
  "Social Democrats", 
  "Conservatives", 
  "Liberals", 
  "Social Liberals", 
  "Soc. People's Party", 
  "Progress Party", 
  "Centre Democrats", 
  "Christian Democrats"
)

# Filter the dataframe for the specified parties
filtered_df <- grouped_party_df %>%
  filter(grouped_party %in% parties_to_plot)

# Summarize the data by year and grouped party
summarized_df <- filtered_df %>%
  group_by(year, grouped_party) %>%
  summarize(total_count = sum(count), .groups = 'drop')

# Convert year to numeric
summarized_df$year <- as.numeric(summarized_df$year)

# Rename the grouped_party column to Party for convenience
summarized_df <- summarized_df %>%
  rename(Party = grouped_party)

# Define custom colors for the specified parties
party_colors <- c(
  "Social Democrats" = "black",
  "Conservatives" = "darkgrey",
  "Liberals" = "lightgrey",
  "Social Liberals" = "grey",
  "Soc. People's Party" = "blue",
  "Progress Party" = "red",
  "Centre Democrats" = "green",
  "Christian Democrats" = "purple"
)

# Set the linetype to solid for all parties
party_linetypes <- rep("solid", length(parties_to_plot))  # All solid lines

# Create the plot with solid lines
party_speeches_plot <- ggplot(summarized_df, aes(x = year, y = total_count, color = Party, linetype = Party)) +
  geom_line(size = 0.75) +  # Adjust line thickness as needed
  scale_color_manual(values = party_colors) +  # Apply custom colors
  scale_linetype_manual(values = rep("solid", length(parties_to_plot))) +  # All solid lines
  labs(
    x = "Year",
    y = "Number of speeches",
    color = "Party",
    linetype = "Party"
  ) +
  theme_minimal()

# Graphs

# Neoliberalism

df_final$yearmon <- format(df_final$date, "%Y-%m")
df_final$word <- str_count(df_final$tokens_edit, "markedsøkonomi|frie marked|markedsreform") #Finder strings, der indeholder word
bymonthyear_word <- aggregate(cbind(word)~yearmon, data=df_final,FUN=sum)

bymonthyear_word$yearmon <- paste(bymonthyear_word$yearmon, "01", sep="-")
bymonthyear_word$yearmon <- as.Date(bymonthyear_word$yearmon)

Sys.setlocale("LC_TIME", "English") # Set locale to English for time-related operations
p <- ggplot(bymonthyear_word, aes(x = yearmon, y = word)) +
  geom_point() +  # Adjust point size here
  scale_x_date(date_labels = "%b-%Y", breaks = bymonthyear_word$yearmon[seq(1, length(bymonthyear_word$yearmon), by = 16)], limit=c(as.Date("1980-01-01"),as.Date("2000-01-01"))) +
  geom_vline(xintercept = as.numeric(as.Date("1989-11-01")), linetype = "dashed", color = "black") + # Demarcation line
  annotate("text", x = as.Date("1989-11-01"), y = max(bymonthyear_word$word) * 0.9, 
           label = "November 1989", vjust = -1, hjust = 1.1, size = 3.5) + # Horizontal annotation
  labs(title = "Neoliberalism", x = "Year and month", y = "Term Frequency") +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5), # Center the title
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1), # Rotate x-axis labels
    legend.position = "none", # Remove the legend
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank()
  )

plot_neo <- p + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1, size = 10))

# Discrediting socialism

df_final$yearmon <- format(df_final$date, "%Y-%m")
df_final$word <- str_count(df_final$tokens_edit, "plantyranni|planøkono|kommandoøkono") #Finder strings, der indeholder word
bymonthyear_word <- aggregate(cbind(word)~yearmon, data=df_final,FUN=sum)

bymonthyear_word$yearmon <- paste(bymonthyear_word$yearmon, "01", sep="-")
bymonthyear_word$yearmon <- as.Date(bymonthyear_word$yearmon)

Sys.setlocale("LC_TIME", "English") # Set locale to English for time-related operations
p <- ggplot(bymonthyear_word, aes(x = yearmon, y = word)) +
  geom_point() +  # Adjust point size here
  scale_x_date(date_labels = "%b-%Y", breaks = bymonthyear_word$yearmon[seq(1, length(bymonthyear_word$yearmon), by = 16)], limit=c(as.Date("1980-01-01"),as.Date("2000-01-01"))) +
  geom_vline(xintercept = as.numeric(as.Date("1989-11-01")), linetype = "dashed", color = "black") + # Demarcation line
  annotate("text", x = as.Date("1989-11-01"), y = max(bymonthyear_word$word) * 0.9, 
           label = "November 1989", vjust = -1, hjust = 1.1, size = 3.5) + # Horizontal annotation
  labs(title = "Discrediting socialism", x = "Year and month", y = "Term Frequency") +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5), # Center the title
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1), # Rotate x-axis labels
    legend.position = "none", # Remove the legend
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank()
  )

plot_soc <- p + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1, size = 10))

plot_grid_descr <- plot_grid(plot_neo, plot_soc,
                                      nrow = 1, ncol = 2, align = "h", axis = "b")
